#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 27 23:01:17 2018
@project: 天池比赛-A股主板上市公司公告信息抽取
@group: MZH_314
@author: LHQ
"""
import re
from array import array
import numpy as np


class Feature:
    """
    模式特征
    """
    def __init__(self):
        self.patterns = []

    def add_pattern(self, regex: str):
        p = re.compile(regex)
        self.patterns.append(p)

    def build_feat_matrix(self, raw_docs):
        """构造0-1特征矩阵

        Args
        ----
        raw_docs : iterable
            未分词过的原始文档

        Returns
        -------
        feature_matrix : matrix
            0-1特征矩阵
        """
        feat_mat = []
        for doc in raw_docs:
            arr = array('i')
            for p in self.patterns:
                feat = 1 if p.search(doc) else 0
                arr.append(feat)
            feat_mat.append(arr)
        return np.array(feat_mat)
